This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
class: inverse, middle, center
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins)
penguins <- remove_missing(penguins)
## Warning: Removed 11 rows containing missing values.
model <- lm(body_mass_g ~ flipper_length_mm + species, data = penguins)
penguins %>%
mutate(fitted = model$fitted.values) ->
penguins_w_fitted
penguins_w_fitted %>%
ggplot() +
aes(x = flipper_length_mm) +
aes(y = body_mass_g) +
aes(color = species) +
geom_point() +
geom_line(aes(y = fitted, group = species))
compute_panel_ols_ind <- function(data, scales, formula = y ~ x + indicator) {
model <- lm(formula = formula,
data = data)
data.frame(x = data$x,
y = model$fitted.values,
indicator = data$indicator,
xend = data$x,
yend = data$y)
}
–
StatLmindicator <- ggplot2::ggproto("StatLmindicator",
ggplot2::Stat,
compute_panel = compute_panel_ols_ind,
required_aes = c("x", "y", "indicator"),
default_aes = ggplot2::aes(
group = ggplot2::after_stat(indicator),
color = indicator))
–
geom_lm_indicator <- function(mapping = NULL, data = NULL,
position = "identity", na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatLmindicator, # proto object from Step 2
geom = ggplot2::GeomLine, # inherit other behavior
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
ggplot(palmerpenguins::penguins) +
aes(x = flipper_length_mm ) +
aes(y = body_mass_g ) +
geom_point() +
aes(color = species) +
aes(indicator = species) +
geom_lm_indicator()
## Warning: Removed 2 rows containing non-finite values (`stat_lmindicator()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
ggplot(palmerpenguins::penguins) +
aes(x = flipper_length_mm ) +
aes(y = body_mass_g ) +
geom_point() +
aes(color = species) +
aes(indicator = species) +
geom_lm_indicator(formula = y ~ x * indicator)
## Warning: Removed 2 rows containing non-finite values (`stat_lmindicator()`).
## Removed 2 rows containing missing values (`geom_point()`).
ggplot(palmerpenguins::penguins) +
aes(x = flipper_length_mm ) +
aes(y = body_mass_g ) +
geom_point() +
aes(color = species) +
aes(indicator = species) +
geom_lm_indicator(formula = y ~ I(x^3) + I(x^2) + x + indicator)
## Warning: Removed 2 rows containing non-finite values (`stat_lmindicator()`).
## Removed 2 rows containing missing values (`geom_point()`).
ggplot(palmerpenguins::penguins) +
aes(x = flipper_length_mm ) +
aes(y = body_mass_g ) +
geom_point() +
aes(color = species) +
aes(indicator = species) +
geom_lm_indicator(formula = y ~ x + indicator) +
facet_wrap(~sex)
## Warning: Removed 2 rows containing non-finite values (`stat_lmindicator()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
library(tidyverse)
library(gapminder)
gapminder %>%
filter(continent == "Americas") %>%
filter(year == 2002) %>%
select(country, pop) ->
prep
packcircles::circleProgressiveLayout(prep$pop,
sizetype = 'area') ->
pack
cbind(prep, pack) %>%
mutate(id = row_number())
## country pop x y radius id
## 1 Argentina 38331121 -3493.0180 0.0000 3493.0180 1
## 2 Bolivia 8445134 1639.5639 0.0000 1639.5639 2
## 3 Brazil 179914212 2732.7743 -9142.0260 7567.5936 3
## 4 Canada 31902268 1150.7522 4801.4069 3186.6608 4
## 5 Chile 15497046 5273.8171 1302.3806 2221.0049 5
## 6 Colombia 41008227 10562.3302 -1160.6508 3612.9384 6
## 7 Costa Rica 3834934 -4573.1172 -4469.2048 1104.8518 7
## 8 Cuba 11226999 -7453.2262 -3646.6547 1890.4139 8
## 9 Dominican Republic 8650322 -8636.6596 -299.9554 1659.3622 9
## 10 Ecuador 12921234 -7908.2513 3314.7888 2028.0425 10
## 11 El Salvador 6353681 -4769.2005 4746.5765 1422.1250 11
## 12 Guatemala 11178650 -3084.4567 7593.9564 1886.3390 12
## 13 Haiti 7607651 -133.6785 9366.9797 1556.1460 13
## 14 Honduras 6677328 2869.9023 9116.0827 1457.8956 14
## 15 Jamaica 2664659 4409.2334 7302.3944 920.9708 15
## 16 Mexico 102479927 10986.7220 8154.0495 5711.4249 16
## 17 Nicaragua 5146848 -5728.2786 -6555.5701 1279.9580 17
## 18 Panama 2990875 -7982.0775 -6463.5726 975.7177 18
## 19 Paraguay 5884491 -6893.5704 6556.3419 1368.6094 19
## 20 Peru 26769436 -6631.4420 10836.0023 2919.0711 20
## 21 Puerto Rico 3859606 -2614.0309 10551.5165 1108.4001 21
## 22 Trinidad and Tobago 1101832 -1083.9386 11293.7595 592.2196 22
## 23 United States 287675526 -18526.1513 -6598.5236 9569.2196 23
## 24 Uruguay 3363085 -11041.9619 913.3993 1034.6512 24
## 25 Venezuela 24287670 4129.4010 13162.9819 2780.4686 25
pack %>%
packcircles::circleLayoutVertices(npoints = 50) ->
circle_outlines
circle_outlines %>%
ggplot() +
aes(x = x, y = y) +
geom_polygon(colour = "black", alpha = 0.6) +
aes(group = id) +
aes(fill = factor(id)) +
geom_text(data = cbind(prep, pack),
aes(x, y, size = pop, label = country,
group = NULL, fill = NULL)) +
theme(legend.position = "none") +
coord_equal()
# you won't use the scales argument, but ggplot will later
compute_panel_circle_pack <- function(data, scales){
data %>%
mutate(id = row_number()) ->
data1
if(is.null(data$area)){
data1 %>%
mutate(area = 1) ->
data1
}
data1 %>%
pull(area) %>%
packcircles::circleProgressiveLayout(
sizetype = 'area') %>%
packcircles::circleLayoutVertices(npoints = 300) %>%
left_join(data1) #%>%
# rename(group = id)
}
# step 1b test the computation function
gapminder::gapminder %>%
filter(continent == "Americas") %>%
filter(year == 2002) %>%
# input must have required aesthetic inputs as columns
rename(area = pop) %>%
compute_panel_circle_pack() %>%
head()
## Joining with `by = join_by(id)`
## x y id country continent year lifeExp area gdpPercap
## 1 0.0000000 0.00000 1 Argentina Americas 2002 74.34 38331121 8797.641
## 2 -0.7660766 73.15225 1 Argentina Americas 2002 74.34 38331121 8797.641
## 3 -3.0639703 146.27241 1 Argentina Americas 2002 74.34 38331121 8797.641
## 4 -6.8926731 219.32842 1 Argentina Americas 2002 74.34 38331121 8797.641
## 5 -12.2505058 292.28821 1 Argentina Americas 2002 74.34 38331121 8797.641
## 6 -19.1351181 365.11980 1 Argentina Americas 2002 74.34 38331121 8797.641
gapminder::gapminder %>%
filter(continent == "Americas") %>%
filter(year == 2002) %>%
# input must have required aesthetic inputs as columns
rename(area = pop) %>%
compute_panel_circle_pack() %>%
str()
## Joining with `by = join_by(id)`
## 'data.frame': 7525 obs. of 9 variables:
## $ x : num 0 -0.766 -3.064 -6.893 -12.251 ...
## $ y : num 0 73.2 146.3 219.3 292.3 ...
## $ id : int 1 1 1 1 1 1 1 1 1 1 ...
## $ country : Factor w/ 142 levels "Afghanistan",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ year : int 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 ...
## $ lifeExp : num 74.3 74.3 74.3 74.3 74.3 ...
## $ area : int 38331121 38331121 38331121 38331121 38331121 38331121 38331121 38331121 38331121 38331121 ...
## $ gdpPercap: num 8798 8798 8798 8798 8798 ...
# step 1b test the computation function
gapminder::gapminder %>%
filter(continent == "Americas") %>%
filter(year == 2002) %>%
# input must have required aesthetic inputs as columns
rename(area = pop) %>%
compute_panel_circle_pack() %>%
ggplot() +
aes(x = x, y = y, fill = country) +
geom_polygon()
## Joining with `by = join_by(id)`
my_setup_data <- function(data, params){
if(data$group[1] == -1){
nrows <- nrow(data)
data$group <- seq_len(nrows)
}
data
}
StatCirclepack <- ggplot2::ggproto(`_class` = "StatCirclepack",
`_inherit` = ggplot2::Stat,
required_aes = c("id"),
compute_panel = compute_panel_circle_pack#,
# setup_data = my_setup_data,
# default_aes = aes(fill = after_stat(area))
)
geom_polygon_circlepack <- function(mapping = NULL, data = NULL,
position = "identity", na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatCirclepack, # proto object from Step 2
geom = ggplot2::GeomPoint, # inherit other behavior
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
gapminder::gapminder %>%
filter(year == 2002) %>%
ggplot() +
aes(id = country) +
geom_polygon_circlepack(alpha = .5, size = .002)
## Joining with `by = join_by(id)`
last_plot() +
aes(color = continent)
## Joining with `by = join_by(id)`
last_plot() +
aes(area = pop)
## Joining with `by = join_by(id)`
last_plot() +
aes(color = continent) +
facet_wrap(facets = vars(continent))
## Joining with `by = join_by(id)`
## Joining with `by = join_by(id)`
## Joining with `by = join_by(id)`
## Joining with `by = join_by(id)`
## Joining with `by = join_by(id)`
library(tidyverse)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
fips_geometries <- readRDS(url("https://wilkelab.org/SDS375/datasets/US_counties.rds")) %>%
rename(FIPS = GEOID)
US_census <- read_csv("https://wilkelab.org/SDS375/datasets/US_census.csv",
col_types = cols(FIPS = "c")
)
# from Claus Wilke on ggplot2
fips_geometries %>%
left_join(US_census, by = "FIPS") %>%
ggplot() +
geom_sf(aes(fill = mean_work_travel), linewidth = .1) +
scale_fill_viridis_c(option = "magma") ->
classic_plot_sf_layer
classic_plot_sf_layer
layer_data(classic_plot_sf_layer) %>%
select(geometry, xmin, xmax, ymin, ymax) %>%
bind_cols(tibble(FIPS = fips_geometries$FIPS)) %>%
rename(fips = FIPS) ->
reference
compute_panel_county <- function(data, scales){
data %>%
# inner_join(fips_ggplot2_reference)
inner_join(reference, multiple = "all") %>%
mutate(group = -1)
}
StatCounty <- ggplot2::ggproto(`_class` = "StatCounty",
`_inherit` = ggplot2::Stat,
# required_aes = c("fips"),
# setup_data = my_setup_data,
compute_panel = compute_panel_county,
default_aes = aes(geometry = after_stat(geometry))
)
geom_sf_county <- function(
mapping = NULL,
data = NULL,
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatCounty, # proto object from step 2
geom = ggplot2::GeomSf, # inherit other behavior
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
read_csv("https://wilkelab.org/SDS375/datasets/US_census.csv",
col_types = cols(FIPS = "c")) %>%
ggplot() +
aes(fips = FIPS) +
geom_sf_county(linewidth = .02,
color = "darkgrey") +
aes(fill = mean_work_travel) +
coord_sf() +
scale_fill_viridis_c(option = "magma")
## Joining with `by = join_by(fips)`